For one continuous variable = Numeric:
For one discrete varaible = Factor:
library(ggplot2)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
alpha, color, fill, linetype, size
set.seed(1234)
wdata = as_data_frame(data.frame(sex = factor(rep(c("F", "M"), each=200)), weight = c(rnorm(200,55),rnorm(200,58))))
wdata## # A tibble: 400 × 2
## sex weight
## <fctr> <dbl>
## 1 F 53.79293
## 2 F 55.27743
## 3 F 56.08444
## 4 F 52.65430
## 5 F 55.42912
## 6 F 55.50606
## 7 F 54.42526
## 8 F 54.45337
## 9 F 54.43555
## 10 F 54.10996
## # ... with 390 more rows
mu <- wdata %>% group_by(sex) %>% summarize(grp.mean = mean(weight))
mu## # A tibble: 2 × 2
## sex grp.mean
## <fctr> <dbl>
## 1 F 54.94224
## 2 M 58.07325
a <- ggplot(wdata, aes(x = weight))
a + geom_area(stat = "bin", color = "black", fill = "#00AFBB")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#a + geom_area() will not get right result, object 'y' not found. Use stat to specify the count as y
#Note that, by default y axis corresponds to the count of weight values. If you want to change the plot in order to have the density on y axis, the R code would be as follow.
a + geom_area(aes(y = ..density..), stat = "bin")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data("diamonds")
diamonds <- as_data_frame(diamonds)
diamonds## # A tibble: 53,940 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4.00 4.05 2.39
## # ... with 53,930 more rows
p <- ggplot(diamonds, aes(x = price, fill = cut))
# Bar plot
p + geom_bar(stat = "bin")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Area plot
p + geom_area(stat = "bin")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
alpha, color, fill, linetype, size
# Basic plots
a + geom_density()# Add color and mean xintercept and median xintercept
a + geom_density(color = "black", fill = "gray") + geom_vline(aes(xintercept = mean(weight)), color = "#FC4E08", linetype = "dashed", size = 1) + geom_vline(aes(xintercept = median(weight)), color = "blue", linetype = 4, size = 1)# Change color by group
a + geom_density(aes(fill = sex), alpha = 0.4) # Add mean lines and color by sex
a + geom_density(aes(fill = sex), alpha = 0.4) + geom_vline(data = mu, aes(xintercept = grp.mean, color = sex), linetype = "dashed")# Change manually
# change line manually
a2 <- a + geom_density(aes(color = sex)) + geom_vline(data = mu, aes(xintercept = grp.mean, color = sex), linetype = "dashed") + theme_minimal()
a2 + scale_color_manual(values = c("#999999", "#E69F00"))a2 + scale_color_brewer(palette = "Paired")a2 + scale_color_grey()# change fill manually
a3 <- a + geom_density(aes(fill = sex), alpha = 0.4) + theme_minimal()
a3 + scale_fill_manual(values = c("#999999", "#E69F00"))a3 + scale_fill_brewer(palette = "Dark2")a3 + scale_fill_grey()identity(position_identity()), stack(position_stack()), dodge(position_dodge()); Default values is “stack”
alpha, color, fill, linetype, size
# Basic plot
a + geom_histogram()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
a + geom_histogram(bins = 50)#Note that by default, stat_bin uses 30 bins - this might not be good default. You can change the number of bins (e.g.: bins = 50 or the bin width e.g.: binwidth = 0.5.
a + geom_histogram(bins = 50, color = "black", fill = "grey") + geom_vline(aes(xintercept = mean(weight)), color = "#FC4E07", linetype = "dashed", size = 1) + theme_minimal()a + geom_histogram(aes(y = ..density..), bins = 50)# Change color by sex
a + geom_histogram(aes(color = sex), fill = "white", bins = 50) + theme_minimal()# Position adjustment "identity"(overlaid)
a + geom_histogram(aes(color = sex), fill = "white", bins = 50, alpha = 0.6, position = "identity")# Position adjustment "dodge" (Interleaved)
# Add mean lines and color by sex
a + geom_histogram(aes(color = sex), fill = "white", alpha = 0.6, position = "dodge", bins = 50) + geom_vline(aes(xintercept = mean(weight)), linetype = "dashed")# Change fill, color manually
# Change outline color manually
a + geom_histogram(aes(color = sex), fill = "white", alpha = 0.4, position = "identity", bins = 50) + scale_color_manual(values = c("#00AFBB","#E7B800"))# Change fill and outline color manually
# a + geom_histogram(aes(color = sex), fill = "white", alpha =0.4, position = "identity", bins = 50) + scale_fill_manual(values = c("#00AFBB", "#E7B800")) + scale_color_manual(values = c("#00AFBB", "#E7B800"))
# wrong command, I have to assign fill first by group
a + geom_histogram(aes(color = sex, fill = sex), alpha =0.4, position = "identity", bins = 50) + scale_fill_manual(values = c("#00AFBB", "#E7B800")) + scale_color_manual(values = c("#00AFBB", "#E7B800")) ## Combine Histogram and Density Plots
# Plot histogram with density values on y-axis(instead of count values).
# Add density plot with transparent density plot
# Histogram with density plot
a + geom_histogram(aes(y = ..density..),color = "black", fill = "white") + geom_density(alpha = 0.2, fill = "#FF6666") + theme_minimal()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Color by groups
a + geom_histogram(aes(y = ..density.., color = sex, fill = sex), alpha = 0.4, position = "identity") + geom_density(aes(color = sex), size =1)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Very close to histogram plots
alpha, color, linetype, size
# Basic plot
a + geom_freqpoly(bins = 30) + theme_minimal()# Change color and linetype by sex
# Use custom color palettes
a + geom_freqpoly(aes(color = sex, linetype = sex), bins = 30 ) + scale_color_manual(values = c("#999999", "#E69F00"))+theme_minimal()# y density
a + geom_freqpoly(aes(y = ..density.., color = sex, linetype = sex), bins = 30 ) + scale_color_manual(values = c("#999999", "#E69F00"))+theme_minimal()Not suitable for one variable, it’s ugly.
a + geom_dotplot(aes(fill = sex))## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
Empirical Cumulative Density Function
alpha, color, linetype, size
a + stat_ecdf(geom = "point")a + stat_ecdf(geom = "step")Quantile - Quantie plots to chech whether a given data follows normal distribution.
alpha, color, shape, size
data(mtcars)
mtcars <- as_data_frame(mtcars)
mtcars## # A tibble: 32 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## # ... with 22 more rows
mtcars <- mutate(mtcars, cyl = as.factor(cyl))
mtcars## # A tibble: 32 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## # ... with 22 more rows
p <- ggplot(mtcars, aes(sample = mpg))
# Basic plot
p + stat_qq()# Change point shapes by groups
# Use custom color palettes
p + stat_qq(aes(shape = cyl, color = cyl)) + scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))For one discrete variable
alpha, color, fill, linetype, size
data(mpg)
mpg <- as_data_frame(mpg)
mpg## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31
## 4 audi a4 2.0 2008 4 auto(av) f 21 30
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26
## 7 audi a4 3.1 2008 6 auto(av) f 18 27
## 8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26
## 9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25
## 10 audi a4 quattro 2.0 2008 4 manual(m6) 4 20 28
## # ... with 224 more rows, and 2 more variables: fl <chr>, class <chr>
ggplot(mpg, aes(fl)) + geom_bar(fill = "steelblue") + theme_minimal()geom_point
alpha, color, fill, shape, size
# Data format
mtcars## # A tibble: 32 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## # ... with 22 more rows
b <- ggplot(mtcars, aes(x = wt, y= mpg))
# x weight
# y miles/gallon
#Basic scatter plots
b + geom_point(color = "#00AFBB")# Change the point size, and shape
b + geom_point(color = "#00AFBB", size = 2, shape = 23)# Control point size by continuous variable values
# qsec 1/4 mile time
b + geom_point(aes(size = qsec), color = "#00AFBB")# Label text
b + geom_point() + geom_text(label = rownames(mtcars), nudge_y = 0.8)# Change shape, color, size automatically
# Change point shape by the level of cyl
b + geom_point(aes(shape = cyl))# Change point shape and colors
b + geom_point(aes(color = cyl, shape = cyl))# Change shape, color, size manually
# Change the point sizes manually
b + geom_point(aes(color = cyl, shape = cyl, size = cyl)) + scale_size_manual(values = c(2,3,4))# Change the point shapes and colors manually
b + geom_point(aes(color = cyl, shape = cyl)) + scale_shape_manual(values = c(3,16,17)) + scale_color_manual(values = c('#999999','#E69F00', '#56B4E9'))# Use brewer color palettes
b + geom_point(aes(color = cyl, shape = cyl)) + scale_color_brewer(palette = "Dark2") + theme_minimal()# Use grey scale
b + geom_point(aes(color = cyl, shape = cyl)) + scale_color_grey() + theme_minimal()#####################################################
##Add regression line or smoothed conditional mean###
#####################################################
#geom_smooth(), geom_abline()
#alpha, color, fill, shape, linetype, size
#geom_smooth(method = "auto")
#method:loess->local regression, lm-> linear regression
# Add regression line
b + geom_point() + geom_smooth(method = lm)# Point + regression line
# Remove the confidence interval
b + geom_point() + geom_smooth(method = lm, se = FALSE)# loess method, local regression fitting
b + geom_point() + geom_smooth()## `geom_smooth()` using method = 'loess'
# Change the color and shape by groups 吧
b + geom_point(aes(color = cyl, shape = cyl)) + geom_smooth(aes(color = cyl, fill = cyl), method = lm)# Remove confidence intervals
# Extend the regression lines: fullrage
b + geom_point(aes(color = cyl, shape = cyl)) + geom_smooth(aes(color = cyl), method = lm, se = FALSE, fullrange = TRUE)# Add marginal rugs to a scatter plot
#geom_rug(sides = "bl")
# sides: a string, "trbl", top, right, bottom, left.
# Add marginal rugs
b + geom_point() + geom_rug()# Change the color by group
b + geom_point(aes(color = cyl)) + geom_rug(aes(color = cyl))# Add marginal rugs using faithful data
data(faithful)
faithful <- as_data_frame(faithful)
faithful## # A tibble: 272 × 2
## eruptions waiting
## * <dbl> <dbl>
## 1 3.600 79
## 2 1.800 54
## 3 3.333 74
## 4 2.283 62
## 5 4.533 85
## 6 2.883 55
## 7 4.700 88
## 8 3.600 85
## 9 1.950 51
## 10 4.350 85
## # ... with 262 more rows
ggplot(faithful, aes(x = eruptions, y = waiting)) + geom_point() + geom_rug()# Jitter points to reduce overplotting
# geom_jitter(), position_jitter()
#alpha, color, fill, shape, size
# Use mpg data
p <- ggplot(mpg, aes(displ, hwy))
# Default sactter plot
p + geom_point()# Use jitter to reduce overplotting
p + geom_jitter(position = position_jitter(width = 0.5, height = 0.5))select(mpg, displ, hwy) %>% arrange(-hwy) %>% filter(displ == 1.9)## # A tibble: 3 × 2
## displ hwy
## <dbl> <int>
## 1 1.9 44
## 2 1.9 44
## 3 1.9 41
##
#Text annotation
#geom_text()
#label, alpha, angle, color, family, fontface, hjust, lineheight, size, vjust
b + geom_text(aes(label = rownames(mtcars)), size = 3)c <- ggplot(diamonds, aes(carat, price))
# Add heatmap of 2d bin counts
# geom_bin2d produce a scatter plot with rectangular bins.
# stat_bin_2d(), stat_summary_2d()
# max, xmin, ymax, ymin, alpha, color, fill, linetype, size
c + geom_bin2d()# Change the number of bins
c + geom_bin2d(bins = 15)# Specify the width of bins
c + geom_bin2d(binwidth = c(1,1000))c + stat_bin_2d()c + stat_summary_2d(aes(z = depth))# Add hexagon bining
#geom_hex()
# stat_bin_hex(), stat_summary_hex()
# alpha, color, fill, size
require(hexbin)## Loading required package: hexbin
c + geom_hex()# Change the number of bins
c + geom_hex(bins = 10)c + stat_bin_hex()c + stat_summary_hex(aes(z = depth))# 2D density estimation
# geom_density_2d()
# stat_density_2d()
# alpha, color, linetype, size
# Scatter plot
sp <- ggplot(faithful, aes(x = eruptions, y = waiting))
select(faithful, eruptions, waiting)## # A tibble: 272 × 2
## eruptions waiting
## * <dbl> <dbl>
## 1 3.600 79
## 2 1.800 54
## 3 3.333 74
## 4 2.283 62
## 5 4.533 85
## 6 2.883 55
## 7 4.700 88
## 8 3.600 85
## 9 1.950 51
## 10 4.350 85
## # ... with 262 more rows
# Default plot
sp + geom_density_2d(color = "#E7B800")# Add points
sp + geom_point(color = "#00AFBB") + geom_density_2d(color = "#E7B800")# Use stat_density_2d with geom = "polygon"
sp + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")# Change the gradient color
sp + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon") + scale_fill_gradient(low = "#00AFBB", high = "#FC3E07")# Gradientgeom_jitter
alpha, color, fill, shape, size
ggplot(diamonds, aes(cut, color)) + geom_jitter(aes(color = cut), size = 0.5)select(diamonds, cut, color)## # A tibble: 53,940 × 2
## cut color
## <ord> <ord>
## 1 Ideal E
## 2 Premium E
## 3 Good E
## 4 Premium I
## 5 Good J
## 6 Very Good J
## 7 Very Good I
## 8 Very Good H
## 9 Fair E
## 10 Very Good H
## # ... with 53,930 more rows
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
ToothGrowth <- as_data_frame(ToothGrowth)
ToothGrowth## # A tibble: 60 × 3
## len supp dose
## <dbl> <fctr> <fctr>
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
## 7 11.2 VC 0.5
## 8 11.2 VC 0.5
## 9 5.2 VC 0.5
## 10 7.0 VC 0.5
## # ... with 50 more rows
e <- ggplot(ToothGrowth, aes(x = dose, y = len))alpha, color, linetype, shape, size, fill
geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, notch = FALSE)
# Basic box plot
e + geom_boxplot()# Rotate the box plot
e + geom_boxplot() + coord_flip()# Notched box plot
e + geom_boxplot(notch = TRUE)# Box plot with mean points
e + geom_boxplot() + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 4, color = "blue")# chose which item to display
e + geom_boxplot() + scale_x_discrete(limits = c("0.5", "2"))## Warning: Removed 20 rows containing non-finite values (stat_boxplot).
# change default order of items
e + geom_boxplot() + scale_x_discrete(limits = c("2", "0.5", "1"))e + stat_boxplot(coeff = 1.5)## Warning: Ignoring unknown parameters: coeff
# change the color by group
# box plot outline and fill colors can be automatically controlled by the levels of the grouping variable *dose*
# Use single color
e + geom_boxplot(color = "black", fill = "steelblue")# Change outline colors by dose (groups)
e + geom_boxplot(aes(color = dose))# Change the fill color by dose (groups)
e + geom_boxplot(aes(fill = dose))# Change munually outline colors:
# Use custom color palettes
e2 <- e + geom_boxplot(aes(color = dose)) + theme_minimal()
e2 + scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9"))# Use brewer color palettes
e2 + scale_color_brewer(palette = "Dark2")# Use grey scale
e2 + scale_color_grey()## Change manually by fill color
# Use the custom color palettes
e3 <- e + geom_boxplot(aes(fill = dose)) + theme_minimal()
e3 + scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9"))# Use brewer color palettes
e3 + scale_fill_brewer(palette = "Dark2")# Use grey color
e3 + scale_fill_grey()## Boxplot with multiple groups
#The grouping variable *dose* and *supp* are used:
# Change box plot colors by groups
e + geom_boxplot(aes(fill = supp))# Change the position
e + geom_boxplot(aes(fill = supp), position = position_dodge(1.1))# Change the fill color
e + geom_boxplot(aes(fill = supp), position = position_dodge(1.1)) + scale_fill_brewer("BrBG")Violin plots is similar to boxplot, except that they also show the kernel probability density of the data at different values. Tipically, violin plots will include a marker for the median of the data and a box indicating the interquartile range, as in standard boxplots.
alpha, color, fill, linetype, size, and fill
# Basic plot
e + geom_violin()# Rotate the violin plot
e + geom_violin() + coord_flip()# Set trim argument to FALSE
e + geom_violin(trim = FALSE, fill = "steelblue")## Add summary statistics
# Funtion stat_summary can be used to add mean/median points and more on a violin plot
# Add mean and median points: use fun.y = mean or fun.y = median
e + geom_violin(trim = FALSE) + stat_summary(fun.y = mean, geom = "point", shape = 23, size = 2, color = "blue")# Add mean points +/- SD
# Use geom = "pointrange" or geom = "crossbar"
e + geom_violin(trim = FALSE) + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), geom = "pointrange", color = "red")# The function mean_sdl is used for adding mean and standard deviation.
# It computes the mean plus or minus a constant times the standard deviation. The constant is specified using the argument mult (mult = 1). Default mult = 2.
# The mean +/- SD can be added as crossbar or a pointrange.
# Combine with box plot to add median and quartiles
e + geom_violin(trim = FALSE) + geom_boxplot(width = 0.2)## Change colors by groups
# The color and fill can be automatically controlled by the levels of the grouping variable dose
# Change the outline colors by dose (groups)
e + geom_violin(aes(color = dose), trim = FALSE)# Change the fill color by dose
e + geom_violin(aes(fill = dose), trim = FALSE)# Change outline and fill color manually.
e2 <- e + geom_violin(aes(color = dose), trim = FALSE) + theme_minimal()
e2 + scale_color_brewer(palette = "Dark2")# Change manually fill colors
e3 <- e + geom_violin(aes(fill = dose), trim = FALSE) + theme_minimal()
e3 + scale_fill_brewer(palette = "Dark2")## Violin plot with multiple groups
# Change the color by groups
e + geom_violin(aes(fill = supp), trim = FALSE)# Change fill colors
e + geom_violin(aes(fill = supp), trim = FALSE) + scale_fill_brewer(palette = "Dark2")geom_dotplot(), stat_summary()
alpha, color, dotsize and fill
#Basic dot plot
e + geom_dotplot(binaxis ="y", stackdir = "center")## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
# Change dotsize and stack ratio
e + geom_dotplot(binaxis = "y", stackdir = "center", stackratio = 1.5, dotsize = 1.1)## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
# stat_summary can be used to add mean/median points and more on a violin plot
# Add mean and median points: use fun.y = mean or fun.y = median
e + geom_dotplot(binaxis = "y", stackdir = "center") + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 3, color = "red")## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
# Add mean points with +/- SD
# Use geom = "pointrange" or geom = "crossbar"
e + geom_dotplot(binaxis = "y", stackdir = "center") + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), geom = "pointrange", color = "red")## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## Combine with box plot and dot plot:
# Combine with boxplot
e + geom_boxplot() + geom_dotplot(binaxis = "y", stackdir ="center")## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
# Combine with violin plot
e + geom_violin(trim = FALSE) + geom_dotplot(binaxis = "y", stackdir ="center")## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
# Dotplot + violin plot + stat summary
e + geom_violin(trim = FALSE) + geom_dotplot(binaxis = "y", stackdir ="center") + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), geom = "pointrange", color = "red", shape = 11)## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
# use scale to change the outlien and fill color automatically controlled byt the levels of the grouping variable dose
# scale_color_munual(), scale_color_brewer(), scale_color_grey()
# scale_fill_munual(), sclae_fill_brewer(), scale_fill_grey()
e + geom_dotplot(binaxis = "y", stackdir = "center", aes(color = dose), fill = "white") + theme_minimal()## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## dotplot with multiple groups
# just like boxplot and violin plotStripecharts are also known as one dimensional scatter plots. These plots are suitable compared to box plot when sample sizes are small.
geom_jitter(), stat_summary()
alpha, color, size and fill
e + geom_jitter()# Change the position
# 0.2 is the degree of jitter in x direction
e + geom_jitter(position = position_jitter(0.2))# Change point shapes and size
e + geom_boxplot()+ geom_jitter(position = position_jitter(0.2), shape = 11, size = 1.2)# Add summary statistics
# Add mean or median point
e + geom_jitter(position = position_jitter(0.2)) + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 3, color = "red")# use geom = "pointrange"
e + geom_jitter(position = position_jitter(0.2)) + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), shape = 18, color = "red")# Combine with boxplot and violin plot
e + geom_violin(trim = FALSE) + geom_jitter(position = position_jitter(0.1)) + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 1), shape = 18, color = "red")# Change point shape by group
e + geom_jitter(aes(shape = dose), position = position_jitter(0.2)) + scale_shape_manual(values = c(1,17,19))# Change color by groups
e + geom_jitter(aes(color = dose, shape = dose), position = position_jitter(0.2)) + theme_minimal()# Change the outlien and fill color by scale
##Stripchar with multiple groups
#Change colors and shapes by groups
e + geom_jitter(aes(color = supp, shape = supp), position = position_jitter(0.2))# Add boxplot
e + geom_boxplot(aes(color = supp), position = position_dodge()) + geom_jitter(aes(color = supp, shape = supp), position = position_jitter(0.2)) + theme_minimal()In a line graph, observations are ordered by x value and connected.
x value can be:
geom_line(), geom_path(), geom_step()
alpha, color, linetype and size
df <- data.frame(dose = c("D0.5", "D1", "D2"), len = c(4.2,10, 29.5))
df2 <- data.frame(supp = rep(c("VC", "OJ"), each = 3), dose = rep(c("D0.5", "D1", "D2"),2 ), len = c(6.8, 15, 33, 4.2, 10, 29.5))
p<- ggplot(data = df, aes(x = dose, y = len, group = 1))
p + geom_line() + geom_point()# Change the line color and line type
p + geom_line(linetype = "dashed", color = "steelblue") + geom_point(color = "steelblue")# use geom_step()
p + geom_step() + geom_point()# use paht
p + geom_path() # Line plot with multiple groups
# line tpye and point shape automatically controlled by groups.
p <- ggplot(df2, aes(x = dose, y= len, group = supp))
p + geom_line(aes(linetype = supp)) + geom_point(aes(shape = supp))# Change the line type, point shapes and colors
p + geom_line(aes(linetype = supp, color = supp)) + geom_point(aes(shape = supp, color = supp)) + scale_color_brewer(palette = "Dark2")# X-axis is date; use economics
head(economics)## # A tibble: 6 × 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <int> <dbl> <dbl> <int>
## 1 1967-07-01 507.4 198712 12.5 4.5 2944
## 2 1967-08-01 510.5 198911 12.5 4.7 2945
## 3 1967-09-01 516.3 199113 11.7 4.6 2958
## 4 1967-10-01 512.9 199311 12.5 4.9 3143
## 5 1967-11-01 518.1 199498 12.5 4.7 3066
## 6 1967-12-01 525.8 199657 12.1 4.8 3018
ggplot(data = economics, aes(x = date, y = pop)) + geom_line()# subset data
ss <- subset(economics, date > as.Date("2006-1-1"))
ggplot(data = ss, aes(x = date, y = pop)) + geom_line()# line size
ggplot(data = economics, aes(x = date, y = pop, size = unemploy/ pop)) + geom_line()# multiple time series data:
# Solution 1
ggplot(economics, aes(x = date)) + geom_line(aes(y = psavert, color = "darkred")) + geom_line(aes(y = uempmed), color = "steelblue", linetype = "twodash") + theme_minimal()# Solution 2: melt by date
# Area plot
ggplot(economics, aes(x = date)) + geom_area(aes(y = psavert), fill = "#999999", color = "#999999", alpha = 0.5) + geom_area(aes(y = uempmed), fill = "#E69F00", color = "#E69F00", alpha = 0.5) + theme_minimal()geom_bar()
alpha, color, fill, linetype and size
df <- data.frame(dose = c("D0.5", "D1", "D2"), len = c(4.2,10, 29.5))
df2 <- data.frame(supp = rep(c("VC", "OJ"), each = 3), dose = rep(c("D0.5", "D1", "D2"),2 ), len = c(6.8, 15, 33, 4.2, 10, 29.5))
f <- ggplot(df, aes(x = dose, y = len))
f + geom_bar(stat = "identity")#Change fill color and add labels at the top
f + geom_bar(stat= "identity", fill = "steelblue") + geom_text(aes(label = len), vjust = -0.3, size = 3.5) + theme_minimal()f + geom_bar(stat= "identity", fill = "steelblue") + geom_text(aes(label = len), vjust = 1.6, size = 3.5, color = "white") + theme_minimal() + scale_x_discrete(limits = c("D2", "D0.5", "D1"))# change the color by groups
f + geom_bar(aes(color = dose), stat = "identity", fill = "white")#bar plot with multiple groups
g <- ggplot(data =df2, aes(x = dose, y = len, fill = supp))
# Statcked bar plot
g + geom_bar(stat = "identity")# Use position = position_dodge()
g + geom_bar(stat = "identity", position = position_dodge()) + geom_text(aes(label = len), vjust = 1.6, color = "white", position = position_dodge(0.9), size = 3.5)library(dplyr)
library(plyr)## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
df_sorted <- arrange(df2, dose, supp)
df_cumsum <- ddply(df_sorted, "dose", transform, label_ypos = cumsum(len))
# Create the bar plot
ggplot(data = df_cumsum, aes(x = dose, y = len, fill = supp)) + geom_bar(stat = "identity") + geom_text(aes(label = len, y = label_ypos), vjust = 1.6, color = "white", size = 3.5)